import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
plt.style.use ("dark_background")
dataset = pd.read_excel('Z-Alizadeh sani dataset.xlsx')
dataset
| Age | Weight | Length | Sex | BMI | DM | HTN | Current Smoker | EX-Smoker | FH | ... | K | Na | WBC | Lymph | Neut | PLT | EF-TTE | Region RWMA | VHD | Cath | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 53 | 90 | 175 | Male | 29.387755 | 0 | 1 | 1 | 0 | 0 | ... | 4.7 | 141 | 5700 | 39 | 52 | 261 | 50 | 0 | N | Cad |
| 1 | 67 | 70 | 157 | Fmale | 28.398718 | 0 | 1 | 0 | 0 | 0 | ... | 4.7 | 156 | 7700 | 38 | 55 | 165 | 40 | 4 | N | Cad |
| 2 | 54 | 54 | 164 | Male | 20.077335 | 0 | 0 | 1 | 0 | 0 | ... | 4.7 | 139 | 7400 | 38 | 60 | 230 | 40 | 2 | mild | Cad |
| 3 | 66 | 67 | 158 | Fmale | 26.838648 | 0 | 1 | 0 | 0 | 0 | ... | 4.4 | 142 | 13000 | 18 | 72 | 742 | 55 | 0 | Severe | Normal |
| 4 | 50 | 87 | 153 | Fmale | 37.165193 | 0 | 1 | 0 | 0 | 0 | ... | 4.0 | 140 | 9200 | 55 | 39 | 274 | 50 | 0 | Severe | Normal |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 298 | 58 | 84 | 168 | Male | 29.761905 | 0 | 0 | 0 | 0 | 0 | ... | 4.8 | 146 | 8500 | 34 | 58 | 251 | 45 | 0 | N | Cad |
| 299 | 55 | 64 | 152 | Fmale | 27.700831 | 0 | 0 | 0 | 0 | 0 | ... | 4.0 | 139 | 11400 | 16 | 80 | 377 | 40 | 0 | mild | Normal |
| 300 | 48 | 77 | 160 | Fmale | 30.078125 | 0 | 1 | 0 | 0 | 1 | ... | 4.0 | 140 | 9000 | 35 | 55 | 279 | 55 | 0 | N | Normal |
| 301 | 57 | 90 | 159 | Fmale | 35.599858 | 1 | 0 | 0 | 0 | 0 | ... | 3.8 | 141 | 3800 | 48 | 40 | 208 | 55 | 0 | N | Normal |
| 302 | 56 | 85 | 170 | Fmale | 29.411765 | 0 | 1 | 1 | 0 | 0 | ... | 4.4 | 147 | 6000 | 32 | 55 | 302 | 55 | 0 | N | Cad |
303 rows × 56 columns
dataset.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 303 entries, 0 to 302 Data columns (total 56 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Age 303 non-null int64 1 Weight 303 non-null int64 2 Length 303 non-null int64 3 Sex 303 non-null object 4 BMI 303 non-null float64 5 DM 303 non-null int64 6 HTN 303 non-null int64 7 Current Smoker 303 non-null int64 8 EX-Smoker 303 non-null int64 9 FH 303 non-null int64 10 Obesity 303 non-null object 11 CRF 303 non-null object 12 CVA 303 non-null object 13 Airway disease 303 non-null object 14 Thyroid Disease 303 non-null object 15 CHF 303 non-null object 16 DLP 303 non-null object 17 BP 303 non-null int64 18 PR 303 non-null int64 19 Edema 303 non-null int64 20 Weak Peripheral Pulse 303 non-null object 21 Lung rales 303 non-null object 22 Systolic Murmur 303 non-null object 23 Diastolic Murmur 303 non-null object 24 Typical Chest Pain 303 non-null int64 25 Dyspnea 303 non-null object 26 Function Class 303 non-null int64 27 Atypical 303 non-null object 28 Nonanginal 303 non-null object 29 Exertional CP 303 non-null object 30 LowTH Ang 303 non-null object 31 Q Wave 303 non-null int64 32 St Elevation 303 non-null int64 33 St Depression 303 non-null int64 34 Tinversion 303 non-null int64 35 LVH 303 non-null object 36 Poor R Progression 303 non-null object 37 BBB 303 non-null object 38 FBS 303 non-null int64 39 CR 303 non-null float64 40 TG 303 non-null int64 41 LDL 303 non-null int64 42 HDL 303 non-null float64 43 BUN 303 non-null int64 44 ESR 303 non-null int64 45 HB 303 non-null float64 46 K 303 non-null float64 47 Na 303 non-null int64 48 WBC 303 non-null int64 49 Lymph 303 non-null int64 50 Neut 303 non-null int64 51 PLT 303 non-null int64 52 EF-TTE 303 non-null int64 53 Region RWMA 303 non-null int64 54 VHD 303 non-null object 55 Cath 303 non-null object dtypes: float64(5), int64(29), object(22) memory usage: 132.7+ KB
dataset.isna().sum()
Age 0 Weight 0 Length 0 Sex 0 BMI 0 DM 0 HTN 0 Current Smoker 0 EX-Smoker 0 FH 0 Obesity 0 CRF 0 CVA 0 Airway disease 0 Thyroid Disease 0 CHF 0 DLP 0 BP 0 PR 0 Edema 0 Weak Peripheral Pulse 0 Lung rales 0 Systolic Murmur 0 Diastolic Murmur 0 Typical Chest Pain 0 Dyspnea 0 Function Class 0 Atypical 0 Nonanginal 0 Exertional CP 0 LowTH Ang 0 Q Wave 0 St Elevation 0 St Depression 0 Tinversion 0 LVH 0 Poor R Progression 0 BBB 0 FBS 0 CR 0 TG 0 LDL 0 HDL 0 BUN 0 ESR 0 HB 0 K 0 Na 0 WBC 0 Lymph 0 Neut 0 PLT 0 EF-TTE 0 Region RWMA 0 VHD 0 Cath 0 dtype: int64
sns.heatmap(dataset.isna())
<Axes: >
dataset.describe()
| Age | Weight | Length | BMI | DM | HTN | Current Smoker | EX-Smoker | FH | BP | ... | ESR | HB | K | Na | WBC | Lymph | Neut | PLT | EF-TTE | Region RWMA | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | ... | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 |
| mean | 58.897690 | 73.831683 | 164.716172 | 27.248339 | 0.297030 | 0.590759 | 0.207921 | 0.033003 | 0.158416 | 129.554455 | ... | 19.462046 | 13.153465 | 4.230693 | 140.996700 | 7562.046205 | 32.399340 | 60.148515 | 221.488449 | 47.231023 | 0.620462 |
| std | 10.392278 | 11.987358 | 9.327661 | 4.098865 | 0.457706 | 0.492507 | 0.406491 | 0.178941 | 0.365734 | 18.938105 | ... | 15.936475 | 1.610452 | 0.458202 | 3.807885 | 2413.739323 | 9.972592 | 10.182493 | 60.796199 | 8.927194 | 1.132531 |
| min | 30.000000 | 48.000000 | 140.000000 | 18.115413 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 90.000000 | ... | 1.000000 | 8.900000 | 3.000000 | 128.000000 | 3700.000000 | 7.000000 | 32.000000 | 25.000000 | 15.000000 | 0.000000 |
| 25% | 51.000000 | 65.000000 | 158.000000 | 24.514380 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 120.000000 | ... | 9.000000 | 12.200000 | 3.900000 | 139.000000 | 5800.000000 | 26.000000 | 52.500000 | 183.500000 | 45.000000 | 0.000000 |
| 50% | 58.000000 | 74.000000 | 165.000000 | 26.775510 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 130.000000 | ... | 15.000000 | 13.200000 | 4.200000 | 141.000000 | 7100.000000 | 32.000000 | 60.000000 | 210.000000 | 50.000000 | 0.000000 |
| 75% | 66.000000 | 81.000000 | 171.000000 | 29.411765 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 140.000000 | ... | 26.000000 | 14.200000 | 4.500000 | 143.000000 | 8800.000000 | 39.000000 | 67.000000 | 250.000000 | 55.000000 | 1.000000 |
| max | 86.000000 | 120.000000 | 188.000000 | 40.900658 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 190.000000 | ... | 90.000000 | 17.600000 | 6.600000 | 156.000000 | 18000.000000 | 60.000000 | 89.000000 | 742.000000 | 60.000000 | 4.000000 |
8 rows × 34 columns
sns.heatmap(dataset.describe())
<Axes: >
dataset.corr()
/tmp/ipykernel_9901/2191645083.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. dataset.corr()
| Age | Weight | Length | BMI | DM | HTN | Current Smoker | EX-Smoker | FH | BP | ... | ESR | HB | K | Na | WBC | Lymph | Neut | PLT | EF-TTE | Region RWMA | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Age | 1.000000 | -0.264585 | -0.163753 | -0.161414 | 0.072543 | 0.246690 | -0.143879 | 0.076608 | -0.183900 | 0.215527 | ... | 0.183127 | -0.161018 | 0.154203 | -0.071886 | 0.020398 | -0.171529 | 0.173030 | -0.049500 | -0.140512 | 0.108663 |
| Weight | -0.264585 | 1.000000 | 0.460631 | 0.725005 | -0.003531 | -0.028532 | 0.157385 | 0.068977 | 0.021963 | -0.025930 | ... | -0.139314 | 0.274218 | -0.018287 | 0.013916 | -0.020214 | 0.020120 | -0.051093 | -0.094192 | 0.026789 | -0.007648 |
| Length | -0.163753 | 0.460631 | 1.000000 | -0.269356 | -0.052318 | -0.153668 | 0.335248 | 0.079034 | 0.004488 | -0.072511 | ... | -0.222182 | 0.341028 | -0.086277 | -0.066590 | 0.066658 | -0.156436 | 0.115075 | -0.133446 | -0.093295 | 0.095715 |
| BMI | -0.161414 | 0.725005 | -0.269356 | 1.000000 | 0.045360 | 0.091652 | -0.089398 | 0.005016 | 0.014045 | 0.031916 | ... | 0.023259 | 0.031107 | 0.044587 | 0.067402 | -0.074928 | 0.139583 | -0.140037 | -0.003964 | 0.093903 | -0.079739 |
| DM | 0.072543 | -0.003531 | -0.052318 | 0.045360 | 1.000000 | 0.217864 | -0.208458 | -0.120087 | -0.064434 | 0.128010 | ... | 0.190397 | -0.156382 | 0.100064 | -0.083030 | 0.110345 | 0.033413 | -0.024417 | 0.051054 | -0.052507 | 0.064891 |
| HTN | 0.246690 | -0.028532 | -0.153668 | 0.091652 | 0.217864 | 1.000000 | -0.169000 | 0.041045 | -0.098467 | 0.570418 | ... | 0.161704 | -0.115935 | 0.011826 | 0.036355 | -0.069374 | 0.017204 | -0.025476 | -0.043840 | 0.031365 | -0.000372 |
| Current Smoker | -0.143879 | 0.157385 | 0.335248 | -0.089398 | -0.208458 | -0.169000 | 1.000000 | -0.094652 | 0.089532 | -0.079115 | ... | -0.121199 | 0.216144 | -0.016599 | 0.036812 | 0.046205 | -0.053224 | 0.042115 | -0.048473 | -0.068943 | 0.078479 |
| EX-Smoker | 0.076608 | 0.068977 | 0.079034 | 0.005016 | -0.120087 | 0.041045 | -0.094652 | 1.000000 | -0.080152 | 0.028781 | ... | 0.017858 | -0.024528 | -0.004318 | -0.038716 | -0.057655 | 0.070524 | -0.068122 | -0.079102 | -0.015153 | 0.012996 |
| FH | -0.183900 | 0.021963 | 0.004488 | 0.014045 | -0.064434 | -0.098467 | 0.089532 | -0.080152 | 1.000000 | -0.082999 | ... | -0.061457 | -0.045348 | -0.017255 | -0.113749 | 0.067973 | -0.014679 | 0.040786 | -0.023000 | 0.089157 | -0.038230 |
| BP | 0.215527 | -0.025930 | -0.072511 | 0.031916 | 0.128010 | 0.570418 | -0.079115 | 0.028781 | -0.082999 | 1.000000 | ... | 0.036155 | -0.129196 | 0.033902 | 0.067064 | -0.071686 | -0.005244 | -0.007812 | -0.092516 | -0.047472 | 0.024047 |
| PR | 0.023576 | -0.075468 | -0.077549 | -0.015680 | 0.025350 | 0.124176 | 0.002796 | -0.065240 | -0.057717 | 0.183231 | ... | 0.108768 | -0.070392 | 0.147650 | 0.010357 | 0.080313 | -0.141028 | 0.144888 | -0.066714 | -0.210017 | 0.152990 |
| Edema | 0.132487 | -0.035323 | -0.039241 | -0.009812 | 0.016133 | 0.134600 | -0.062343 | 0.057211 | 0.004589 | 0.085339 | ... | -0.034615 | -0.094113 | 0.015969 | -0.004275 | 0.101513 | -0.025142 | 0.040315 | -0.054050 | -0.079315 | 0.083133 |
| Typical Chest Pain | 0.138387 | -0.002986 | 0.023149 | -0.012911 | 0.105623 | 0.122788 | 0.079987 | 0.058855 | -0.035920 | 0.114926 | ... | 0.073777 | 0.057803 | 0.126926 | -0.026933 | -0.022196 | -0.076830 | 0.065572 | -0.082399 | -0.103957 | 0.177166 |
| Function Class | 0.051424 | 0.040371 | -0.012710 | 0.064736 | 0.086200 | 0.092880 | -0.037824 | 0.024499 | 0.010162 | 0.017544 | ... | 0.096865 | 0.028994 | 0.007217 | -0.055051 | 0.075008 | -0.004590 | -0.012557 | 0.050548 | -0.119119 | 0.131131 |
| Q Wave | -0.061677 | 0.020584 | 0.045229 | -0.015837 | 0.072583 | -0.043583 | -0.084608 | 0.121591 | -0.062024 | -0.033462 | ... | 0.136909 | 0.007752 | 0.048677 | -0.155067 | 0.013517 | -0.055419 | 0.029939 | 0.003692 | -0.266077 | 0.222826 |
| St Elevation | -0.056926 | 0.071406 | 0.050602 | 0.029896 | 0.028955 | -0.040627 | 0.042192 | 0.047341 | -0.009379 | -0.061334 | ... | 0.141827 | 0.065039 | 0.112392 | -0.103195 | 0.128728 | -0.060937 | 0.035447 | 0.070754 | -0.231493 | 0.268545 |
| St Depression | 0.177432 | -0.114619 | -0.150480 | -0.009146 | 0.015532 | 0.016736 | 0.023762 | -0.014970 | 0.080075 | -0.032295 | ... | 0.078451 | -0.124527 | -0.033712 | -0.005668 | 0.107977 | -0.019841 | 0.066265 | 0.119812 | -0.015214 | 0.165025 |
| Tinversion | 0.041913 | -0.021033 | 0.023690 | -0.043311 | -0.058998 | 0.115040 | 0.200881 | 0.082060 | 0.054250 | 0.046643 | ... | 0.081901 | 0.067330 | 0.032172 | -0.024134 | 0.018630 | -0.015916 | -0.003813 | 0.011547 | -0.121389 | 0.173485 |
| FBS | 0.015385 | 0.012737 | -0.094789 | 0.089380 | 0.677940 | 0.109592 | -0.101457 | -0.079537 | -0.080815 | 0.145861 | ... | 0.143823 | -0.164087 | 0.102861 | -0.059455 | 0.159957 | -0.003719 | 0.031787 | 0.019886 | -0.056692 | 0.037291 |
| CR | 0.227097 | 0.150226 | 0.162634 | 0.034338 | 0.028606 | 0.158881 | -0.046339 | 0.178112 | 0.031882 | 0.077407 | ... | 0.023793 | -0.019728 | -0.010450 | -0.074997 | 0.145125 | -0.066620 | 0.096707 | -0.091782 | -0.115351 | 0.031475 |
| TG | -0.110793 | 0.078469 | -0.034389 | 0.109422 | 0.108792 | 0.045954 | 0.062399 | 0.015597 | -0.019083 | 0.019586 | ... | -0.044736 | 0.123914 | 0.023490 | 0.060313 | 0.012340 | 0.090065 | -0.081575 | -0.049424 | -0.027902 | 0.035353 |
| LDL | -0.033576 | -0.023233 | -0.090970 | 0.040001 | -0.027167 | 0.022755 | -0.025440 | -0.025844 | 0.111292 | 0.080683 | ... | -0.013132 | 0.063645 | 0.037732 | 0.168126 | 0.019056 | 0.118307 | -0.085044 | 0.013452 | 0.159394 | -0.026927 |
| HDL | -0.035793 | -0.059713 | -0.050594 | -0.024338 | -0.043890 | -0.094226 | 0.010228 | -0.056676 | 0.078685 | -0.012459 | ... | -0.084301 | -0.048461 | -0.074145 | 0.088912 | -0.063782 | 0.028257 | -0.024528 | 0.000638 | 0.104394 | -0.062022 |
| BUN | 0.300663 | -0.057670 | -0.071229 | -0.011139 | 0.144394 | 0.152895 | -0.061596 | 0.066455 | -0.014419 | 0.038045 | ... | 0.126928 | -0.085335 | 0.098618 | -0.136310 | 0.088416 | -0.044946 | 0.024421 | 0.041289 | -0.116665 | 0.018362 |
| ESR | 0.183127 | -0.139314 | -0.222182 | 0.023259 | 0.190397 | 0.161704 | -0.121199 | 0.017858 | -0.061457 | 0.036155 | ... | 1.000000 | -0.389803 | 0.006577 | -0.069327 | 0.160759 | -0.158031 | 0.138741 | 0.246826 | -0.057497 | 0.054697 |
| HB | -0.161018 | 0.274218 | 0.341028 | 0.031107 | -0.156382 | -0.115935 | 0.216144 | -0.024528 | -0.045348 | -0.129196 | ... | -0.389803 | 1.000000 | 0.033308 | 0.138745 | -0.000567 | 0.083837 | -0.075441 | -0.106252 | 0.006186 | -0.045481 |
| K | 0.154203 | -0.018287 | -0.086277 | 0.044587 | 0.100064 | 0.011826 | -0.016599 | -0.004318 | -0.017255 | 0.033902 | ... | 0.006577 | 0.033308 | 1.000000 | 0.010686 | 0.118689 | -0.008561 | -0.002896 | 0.022865 | -0.159512 | 0.229266 |
| Na | -0.071886 | 0.013916 | -0.066590 | 0.067402 | -0.083030 | 0.036355 | 0.036812 | -0.038716 | -0.113749 | 0.067064 | ... | -0.069327 | 0.138745 | 0.010686 | 1.000000 | -0.093826 | 0.141032 | -0.134406 | -0.022049 | 0.136491 | -0.022558 |
| WBC | 0.020398 | -0.020214 | 0.066658 | -0.074928 | 0.110345 | -0.069374 | 0.046205 | -0.057655 | 0.067973 | -0.071686 | ... | 0.160759 | -0.000567 | 0.118689 | -0.093826 | 1.000000 | -0.322100 | 0.377770 | 0.290805 | -0.137910 | 0.175318 |
| Lymph | -0.171529 | 0.020120 | -0.156436 | 0.139583 | 0.033413 | 0.017204 | -0.053224 | 0.070524 | -0.014679 | -0.005244 | ... | -0.158031 | 0.083837 | -0.008561 | 0.141032 | -0.322100 | 1.000000 | -0.923081 | -0.011639 | 0.239827 | -0.079181 |
| Neut | 0.173030 | -0.051093 | 0.115075 | -0.140037 | -0.024417 | -0.025476 | 0.042115 | -0.068122 | 0.040786 | -0.007812 | ... | 0.138741 | -0.075441 | -0.002896 | -0.134406 | 0.377770 | -0.923081 | 1.000000 | 0.003637 | -0.228776 | 0.112580 |
| PLT | -0.049500 | -0.094192 | -0.133446 | -0.003964 | 0.051054 | -0.043840 | -0.048473 | -0.079102 | -0.023000 | -0.092516 | ... | 0.246826 | -0.106252 | 0.022865 | -0.022049 | 0.290805 | -0.011639 | 0.003637 | 1.000000 | 0.068409 | -0.010812 |
| EF-TTE | -0.140512 | 0.026789 | -0.093295 | 0.093903 | -0.052507 | 0.031365 | -0.068943 | -0.015153 | 0.089157 | -0.047472 | ... | -0.057497 | 0.006186 | -0.159512 | 0.136491 | -0.137910 | 0.239827 | -0.228776 | 0.068409 | 1.000000 | -0.450799 |
| Region RWMA | 0.108663 | -0.007648 | 0.095715 | -0.079739 | 0.064891 | -0.000372 | 0.078479 | 0.012996 | -0.038230 | 0.024047 | ... | 0.054697 | -0.045481 | 0.229266 | -0.022558 | 0.175318 | -0.079181 | 0.112580 | -0.010812 | -0.450799 | 1.000000 |
34 rows × 34 columns
plt.figure(figsize=(40, 20))
sns.heatmap(dataset.corr())
top_corr_features = dataset.corr().index
g=sns.heatmap(dataset[top_corr_features].corr(),annot=True,linewidth=.10,cmap="rocket")
/tmp/ipykernel_9901/3249925955.py:2: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. sns.heatmap(dataset.corr()) /tmp/ipykernel_9901/3249925955.py:3: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. top_corr_features = dataset.corr().index
plt.figure(figsize=(40, 20))
matrix = np.triu(dataset.corr())
sns.heatmap(dataset.corr(), annot=True, linewidth=.10, mask=matrix, cmap="Paired");
/tmp/ipykernel_9901/2397469981.py:2: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. matrix = np.triu(dataset.corr()) /tmp/ipykernel_9901/2397469981.py:3: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. sns.heatmap(dataset.corr(), annot=True, linewidth=.10, mask=matrix, cmap="Paired");
plt.figure(figsize=(40, 20))
sns.histplot(data=dataset.corr(),kde=True,palette='hot')
/tmp/ipykernel_9901/3055010306.py:2: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. sns.histplot(data=dataset.corr(),kde=True,palette='hot')
<Axes: ylabel='Count'>
dataset.nunique()
Age 46 Weight 54 Length 44 Sex 2 BMI 263 DM 2 HTN 2 Current Smoker 2 EX-Smoker 2 FH 2 Obesity 2 CRF 2 CVA 2 Airway disease 2 Thyroid Disease 2 CHF 2 DLP 2 BP 17 PR 21 Edema 2 Weak Peripheral Pulse 2 Lung rales 2 Systolic Murmur 2 Diastolic Murmur 2 Typical Chest Pain 2 Dyspnea 2 Function Class 4 Atypical 2 Nonanginal 2 Exertional CP 1 LowTH Ang 2 Q Wave 2 St Elevation 2 St Depression 2 Tinversion 2 LVH 2 Poor R Progression 2 BBB 3 FBS 113 CR 18 TG 147 LDL 110 HDL 47 BUN 33 ESR 58 HB 66 K 27 Na 25 WBC 78 Lymph 50 Neut 52 PLT 135 EF-TTE 11 Region RWMA 5 VHD 4 Cath 2 dtype: int64
unique = pd.DataFrame(dataset.nunique())
plt.figure(figsize=(15, 10))
sns.heatmap(unique,annot=True, linewidth=.10, cmap="Paired")
<Axes: >
dataset.drop(['Exertional CP'], axis=1, inplace=True)
dataset
| Age | Weight | Length | Sex | BMI | DM | HTN | Current Smoker | EX-Smoker | FH | ... | K | Na | WBC | Lymph | Neut | PLT | EF-TTE | Region RWMA | VHD | Cath | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 53 | 90 | 175 | Male | 29.387755 | 0 | 1 | 1 | 0 | 0 | ... | 4.7 | 141 | 5700 | 39 | 52 | 261 | 50 | 0 | N | Cad |
| 1 | 67 | 70 | 157 | Fmale | 28.398718 | 0 | 1 | 0 | 0 | 0 | ... | 4.7 | 156 | 7700 | 38 | 55 | 165 | 40 | 4 | N | Cad |
| 2 | 54 | 54 | 164 | Male | 20.077335 | 0 | 0 | 1 | 0 | 0 | ... | 4.7 | 139 | 7400 | 38 | 60 | 230 | 40 | 2 | mild | Cad |
| 3 | 66 | 67 | 158 | Fmale | 26.838648 | 0 | 1 | 0 | 0 | 0 | ... | 4.4 | 142 | 13000 | 18 | 72 | 742 | 55 | 0 | Severe | Normal |
| 4 | 50 | 87 | 153 | Fmale | 37.165193 | 0 | 1 | 0 | 0 | 0 | ... | 4.0 | 140 | 9200 | 55 | 39 | 274 | 50 | 0 | Severe | Normal |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 298 | 58 | 84 | 168 | Male | 29.761905 | 0 | 0 | 0 | 0 | 0 | ... | 4.8 | 146 | 8500 | 34 | 58 | 251 | 45 | 0 | N | Cad |
| 299 | 55 | 64 | 152 | Fmale | 27.700831 | 0 | 0 | 0 | 0 | 0 | ... | 4.0 | 139 | 11400 | 16 | 80 | 377 | 40 | 0 | mild | Normal |
| 300 | 48 | 77 | 160 | Fmale | 30.078125 | 0 | 1 | 0 | 0 | 1 | ... | 4.0 | 140 | 9000 | 35 | 55 | 279 | 55 | 0 | N | Normal |
| 301 | 57 | 90 | 159 | Fmale | 35.599858 | 1 | 0 | 0 | 0 | 0 | ... | 3.8 | 141 | 3800 | 48 | 40 | 208 | 55 | 0 | N | Normal |
| 302 | 56 | 85 | 170 | Fmale | 29.411765 | 0 | 1 | 1 | 0 | 0 | ... | 4.4 | 147 | 6000 | 32 | 55 | 302 | 55 | 0 | N | Cad |
303 rows × 55 columns
dataset.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 303 entries, 0 to 302 Data columns (total 55 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Age 303 non-null int64 1 Weight 303 non-null int64 2 Length 303 non-null int64 3 Sex 303 non-null object 4 BMI 303 non-null float64 5 DM 303 non-null int64 6 HTN 303 non-null int64 7 Current Smoker 303 non-null int64 8 EX-Smoker 303 non-null int64 9 FH 303 non-null int64 10 Obesity 303 non-null object 11 CRF 303 non-null object 12 CVA 303 non-null object 13 Airway disease 303 non-null object 14 Thyroid Disease 303 non-null object 15 CHF 303 non-null object 16 DLP 303 non-null object 17 BP 303 non-null int64 18 PR 303 non-null int64 19 Edema 303 non-null int64 20 Weak Peripheral Pulse 303 non-null object 21 Lung rales 303 non-null object 22 Systolic Murmur 303 non-null object 23 Diastolic Murmur 303 non-null object 24 Typical Chest Pain 303 non-null int64 25 Dyspnea 303 non-null object 26 Function Class 303 non-null int64 27 Atypical 303 non-null object 28 Nonanginal 303 non-null object 29 LowTH Ang 303 non-null object 30 Q Wave 303 non-null int64 31 St Elevation 303 non-null int64 32 St Depression 303 non-null int64 33 Tinversion 303 non-null int64 34 LVH 303 non-null object 35 Poor R Progression 303 non-null object 36 BBB 303 non-null object 37 FBS 303 non-null int64 38 CR 303 non-null float64 39 TG 303 non-null int64 40 LDL 303 non-null int64 41 HDL 303 non-null float64 42 BUN 303 non-null int64 43 ESR 303 non-null int64 44 HB 303 non-null float64 45 K 303 non-null float64 46 Na 303 non-null int64 47 WBC 303 non-null int64 48 Lymph 303 non-null int64 49 Neut 303 non-null int64 50 PLT 303 non-null int64 51 EF-TTE 303 non-null int64 52 Region RWMA 303 non-null int64 53 VHD 303 non-null object 54 Cath 303 non-null object dtypes: float64(5), int64(29), object(21) memory usage: 130.3+ KB
plt.figure(figsize=(10,5))
sns.histplot(data=dataset['Cath'],kde=True,palette='hot')
/tmp/ipykernel_9901/2585740825.py:2: UserWarning: Ignoring `palette` because no `hue` variable has been assigned. sns.histplot(data=dataset['Cath'],kde=True,palette='hot')
<Axes: xlabel='Cath', ylabel='Count'>
LE = LabelEncoder()
dataset['Sex'] = LE.fit_transform(dataset['Sex'])
dataset['Obesity'] = LE.fit_transform(dataset['Obesity'])
dataset['CRF'] = LE.fit_transform(dataset['CRF'])
dataset['CVA'] = LE.fit_transform(dataset['CVA'])
dataset['Airway disease'] = LE.fit_transform(dataset['Airway disease'])
dataset['Thyroid Disease'] = LE.fit_transform(dataset['Thyroid Disease'])
dataset['CHF'] = LE.fit_transform(dataset['CHF'])
dataset['DLP'] = LE.fit_transform(dataset['DLP'])
dataset['Weak Peripheral Pulse'] = LE.fit_transform(dataset['Weak Peripheral Pulse'])
dataset['Lung rales'] = LE.fit_transform(dataset['Lung rales'])
dataset['Systolic Murmur'] = LE.fit_transform(dataset['Systolic Murmur'])
dataset['Diastolic Murmur'] = LE.fit_transform(dataset['Diastolic Murmur'])
dataset['Dyspnea'] = LE.fit_transform(dataset['Dyspnea'])
dataset['Atypical'] = LE.fit_transform(dataset['Atypical'])
dataset['Nonanginal'] = LE.fit_transform(dataset['Nonanginal'])
dataset['LowTH Ang'] = LE.fit_transform(dataset['LowTH Ang'])
dataset['LVH'] = LE.fit_transform(dataset['LVH'])
dataset['Poor R Progression'] = LE.fit_transform(dataset['Poor R Progression'])
dataset['BBB'] = LE.fit_transform(dataset['BBB'])
dataset['VHD'] = LE.fit_transform(dataset['VHD'])
dataset['Cath'] = LE.fit_transform(dataset['Cath'])
dataset
| Age | Weight | Length | Sex | BMI | DM | HTN | Current Smoker | EX-Smoker | FH | ... | K | Na | WBC | Lymph | Neut | PLT | EF-TTE | Region RWMA | VHD | Cath | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 53 | 90 | 175 | 1 | 29.387755 | 0 | 1 | 1 | 0 | 0 | ... | 4.7 | 141 | 5700 | 39 | 52 | 261 | 50 | 0 | 1 | 0 |
| 1 | 67 | 70 | 157 | 0 | 28.398718 | 0 | 1 | 0 | 0 | 0 | ... | 4.7 | 156 | 7700 | 38 | 55 | 165 | 40 | 4 | 1 | 0 |
| 2 | 54 | 54 | 164 | 1 | 20.077335 | 0 | 0 | 1 | 0 | 0 | ... | 4.7 | 139 | 7400 | 38 | 60 | 230 | 40 | 2 | 3 | 0 |
| 3 | 66 | 67 | 158 | 0 | 26.838648 | 0 | 1 | 0 | 0 | 0 | ... | 4.4 | 142 | 13000 | 18 | 72 | 742 | 55 | 0 | 2 | 1 |
| 4 | 50 | 87 | 153 | 0 | 37.165193 | 0 | 1 | 0 | 0 | 0 | ... | 4.0 | 140 | 9200 | 55 | 39 | 274 | 50 | 0 | 2 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 298 | 58 | 84 | 168 | 1 | 29.761905 | 0 | 0 | 0 | 0 | 0 | ... | 4.8 | 146 | 8500 | 34 | 58 | 251 | 45 | 0 | 1 | 0 |
| 299 | 55 | 64 | 152 | 0 | 27.700831 | 0 | 0 | 0 | 0 | 0 | ... | 4.0 | 139 | 11400 | 16 | 80 | 377 | 40 | 0 | 3 | 1 |
| 300 | 48 | 77 | 160 | 0 | 30.078125 | 0 | 1 | 0 | 0 | 1 | ... | 4.0 | 140 | 9000 | 35 | 55 | 279 | 55 | 0 | 1 | 1 |
| 301 | 57 | 90 | 159 | 0 | 35.599858 | 1 | 0 | 0 | 0 | 0 | ... | 3.8 | 141 | 3800 | 48 | 40 | 208 | 55 | 0 | 1 | 1 |
| 302 | 56 | 85 | 170 | 0 | 29.411765 | 0 | 1 | 1 | 0 | 0 | ... | 4.4 | 147 | 6000 | 32 | 55 | 302 | 55 | 0 | 1 | 0 |
303 rows × 55 columns
dataset.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 303 entries, 0 to 302 Data columns (total 55 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Age 303 non-null int64 1 Weight 303 non-null int64 2 Length 303 non-null int64 3 Sex 303 non-null int64 4 BMI 303 non-null float64 5 DM 303 non-null int64 6 HTN 303 non-null int64 7 Current Smoker 303 non-null int64 8 EX-Smoker 303 non-null int64 9 FH 303 non-null int64 10 Obesity 303 non-null int64 11 CRF 303 non-null int64 12 CVA 303 non-null int64 13 Airway disease 303 non-null int64 14 Thyroid Disease 303 non-null int64 15 CHF 303 non-null int64 16 DLP 303 non-null int64 17 BP 303 non-null int64 18 PR 303 non-null int64 19 Edema 303 non-null int64 20 Weak Peripheral Pulse 303 non-null int64 21 Lung rales 303 non-null int64 22 Systolic Murmur 303 non-null int64 23 Diastolic Murmur 303 non-null int64 24 Typical Chest Pain 303 non-null int64 25 Dyspnea 303 non-null int64 26 Function Class 303 non-null int64 27 Atypical 303 non-null int64 28 Nonanginal 303 non-null int64 29 LowTH Ang 303 non-null int64 30 Q Wave 303 non-null int64 31 St Elevation 303 non-null int64 32 St Depression 303 non-null int64 33 Tinversion 303 non-null int64 34 LVH 303 non-null int64 35 Poor R Progression 303 non-null int64 36 BBB 303 non-null int64 37 FBS 303 non-null int64 38 CR 303 non-null float64 39 TG 303 non-null int64 40 LDL 303 non-null int64 41 HDL 303 non-null float64 42 BUN 303 non-null int64 43 ESR 303 non-null int64 44 HB 303 non-null float64 45 K 303 non-null float64 46 Na 303 non-null int64 47 WBC 303 non-null int64 48 Lymph 303 non-null int64 49 Neut 303 non-null int64 50 PLT 303 non-null int64 51 EF-TTE 303 non-null int64 52 Region RWMA 303 non-null int64 53 VHD 303 non-null int64 54 Cath 303 non-null int64 dtypes: float64(5), int64(50) memory usage: 130.3 KB
dataset.describe()
| Age | Weight | Length | Sex | BMI | DM | HTN | Current Smoker | EX-Smoker | FH | ... | K | Na | WBC | Lymph | Neut | PLT | EF-TTE | Region RWMA | VHD | Cath | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | ... | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 |
| mean | 58.897690 | 73.831683 | 164.716172 | 0.580858 | 27.248339 | 0.297030 | 0.590759 | 0.207921 | 0.033003 | 0.158416 | ... | 4.230693 | 140.996700 | 7562.046205 | 32.399340 | 60.148515 | 221.488449 | 47.231023 | 0.620462 | 1.930693 | 0.287129 |
| std | 10.392278 | 11.987358 | 9.327661 | 0.494235 | 4.098865 | 0.457706 | 0.492507 | 0.406491 | 0.178941 | 0.365734 | ... | 0.458202 | 3.807885 | 2413.739323 | 9.972592 | 10.182493 | 60.796199 | 8.927194 | 1.132531 | 1.109180 | 0.453171 |
| min | 30.000000 | 48.000000 | 140.000000 | 0.000000 | 18.115413 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 3.000000 | 128.000000 | 3700.000000 | 7.000000 | 32.000000 | 25.000000 | 15.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 51.000000 | 65.000000 | 158.000000 | 0.000000 | 24.514380 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 3.900000 | 139.000000 | 5800.000000 | 26.000000 | 52.500000 | 183.500000 | 45.000000 | 0.000000 | 1.000000 | 0.000000 |
| 50% | 58.000000 | 74.000000 | 165.000000 | 1.000000 | 26.775510 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 4.200000 | 141.000000 | 7100.000000 | 32.000000 | 60.000000 | 210.000000 | 50.000000 | 0.000000 | 2.000000 | 0.000000 |
| 75% | 66.000000 | 81.000000 | 171.000000 | 1.000000 | 29.411765 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 4.500000 | 143.000000 | 8800.000000 | 39.000000 | 67.000000 | 250.000000 | 55.000000 | 1.000000 | 3.000000 | 1.000000 |
| max | 86.000000 | 120.000000 | 188.000000 | 1.000000 | 40.900658 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 6.600000 | 156.000000 | 18000.000000 | 60.000000 | 89.000000 | 742.000000 | 60.000000 | 4.000000 | 3.000000 | 1.000000 |
8 rows × 55 columns
sns.heatmap(dataset.describe())
<Axes: >
plt.figure(figsize=(40, 20))
sns.heatmap(dataset.corr())
top_corr_features = dataset.corr().index
g=sns.heatmap(dataset[top_corr_features].corr(),annot=True,linewidth=.10,cmap="rocket")
plt.figure(figsize=(40, 20))
matrix = np.triu(dataset.corr())
sns.heatmap(dataset.corr(), annot=True, linewidth=.10, mask=matrix, cmap="Paired");
plt.figure(figsize=(40, 20))
sns.histplot(data=dataset.corr(),kde=True,palette='hot')
<Axes: ylabel='Count'>
plt.figure(figsize=(15, 12))
sns.countplot (dataset['Age'])
<Axes: ylabel='count'>
sns.countplot (dataset['Cath'])
<Axes: ylabel='count'>
dataset
| Age | Weight | Length | Sex | BMI | DM | HTN | Current Smoker | EX-Smoker | FH | ... | K | Na | WBC | Lymph | Neut | PLT | EF-TTE | Region RWMA | VHD | Cath | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 53 | 90 | 175 | 1 | 29.387755 | 0 | 1 | 1 | 0 | 0 | ... | 4.7 | 141 | 5700 | 39 | 52 | 261 | 50 | 0 | 1 | 0 |
| 1 | 67 | 70 | 157 | 0 | 28.398718 | 0 | 1 | 0 | 0 | 0 | ... | 4.7 | 156 | 7700 | 38 | 55 | 165 | 40 | 4 | 1 | 0 |
| 2 | 54 | 54 | 164 | 1 | 20.077335 | 0 | 0 | 1 | 0 | 0 | ... | 4.7 | 139 | 7400 | 38 | 60 | 230 | 40 | 2 | 3 | 0 |
| 3 | 66 | 67 | 158 | 0 | 26.838648 | 0 | 1 | 0 | 0 | 0 | ... | 4.4 | 142 | 13000 | 18 | 72 | 742 | 55 | 0 | 2 | 1 |
| 4 | 50 | 87 | 153 | 0 | 37.165193 | 0 | 1 | 0 | 0 | 0 | ... | 4.0 | 140 | 9200 | 55 | 39 | 274 | 50 | 0 | 2 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 298 | 58 | 84 | 168 | 1 | 29.761905 | 0 | 0 | 0 | 0 | 0 | ... | 4.8 | 146 | 8500 | 34 | 58 | 251 | 45 | 0 | 1 | 0 |
| 299 | 55 | 64 | 152 | 0 | 27.700831 | 0 | 0 | 0 | 0 | 0 | ... | 4.0 | 139 | 11400 | 16 | 80 | 377 | 40 | 0 | 3 | 1 |
| 300 | 48 | 77 | 160 | 0 | 30.078125 | 0 | 1 | 0 | 0 | 1 | ... | 4.0 | 140 | 9000 | 35 | 55 | 279 | 55 | 0 | 1 | 1 |
| 301 | 57 | 90 | 159 | 0 | 35.599858 | 1 | 0 | 0 | 0 | 0 | ... | 3.8 | 141 | 3800 | 48 | 40 | 208 | 55 | 0 | 1 | 1 |
| 302 | 56 | 85 | 170 | 0 | 29.411765 | 0 | 1 | 1 | 0 | 0 | ... | 4.4 | 147 | 6000 | 32 | 55 | 302 | 55 | 0 | 1 | 0 |
303 rows × 55 columns
df = dataset['Cath'].values.sum()
df
87
303-87
216
dataset
| Age | Weight | Length | Sex | BMI | DM | HTN | Current Smoker | EX-Smoker | FH | ... | K | Na | WBC | Lymph | Neut | PLT | EF-TTE | Region RWMA | VHD | Cath | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 53 | 90 | 175 | 1 | 29.387755 | 0 | 1 | 1 | 0 | 0 | ... | 4.7 | 141 | 5700 | 39 | 52 | 261 | 50 | 0 | 1 | 0 |
| 1 | 67 | 70 | 157 | 0 | 28.398718 | 0 | 1 | 0 | 0 | 0 | ... | 4.7 | 156 | 7700 | 38 | 55 | 165 | 40 | 4 | 1 | 0 |
| 2 | 54 | 54 | 164 | 1 | 20.077335 | 0 | 0 | 1 | 0 | 0 | ... | 4.7 | 139 | 7400 | 38 | 60 | 230 | 40 | 2 | 3 | 0 |
| 3 | 66 | 67 | 158 | 0 | 26.838648 | 0 | 1 | 0 | 0 | 0 | ... | 4.4 | 142 | 13000 | 18 | 72 | 742 | 55 | 0 | 2 | 1 |
| 4 | 50 | 87 | 153 | 0 | 37.165193 | 0 | 1 | 0 | 0 | 0 | ... | 4.0 | 140 | 9200 | 55 | 39 | 274 | 50 | 0 | 2 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 298 | 58 | 84 | 168 | 1 | 29.761905 | 0 | 0 | 0 | 0 | 0 | ... | 4.8 | 146 | 8500 | 34 | 58 | 251 | 45 | 0 | 1 | 0 |
| 299 | 55 | 64 | 152 | 0 | 27.700831 | 0 | 0 | 0 | 0 | 0 | ... | 4.0 | 139 | 11400 | 16 | 80 | 377 | 40 | 0 | 3 | 1 |
| 300 | 48 | 77 | 160 | 0 | 30.078125 | 0 | 1 | 0 | 0 | 1 | ... | 4.0 | 140 | 9000 | 35 | 55 | 279 | 55 | 0 | 1 | 1 |
| 301 | 57 | 90 | 159 | 0 | 35.599858 | 1 | 0 | 0 | 0 | 0 | ... | 3.8 | 141 | 3800 | 48 | 40 | 208 | 55 | 0 | 1 | 1 |
| 302 | 56 | 85 | 170 | 0 | 29.411765 | 0 | 1 | 1 | 0 | 0 | ... | 4.4 | 147 | 6000 | 32 | 55 | 302 | 55 | 0 | 1 | 0 |
303 rows × 55 columns
"""dataset = np.array(dataset)
dataset"""
'dataset = np.array(dataset)\ndataset'
'''SC = StandardScaler()
dataset[:,0:3] = SC.fit_transform(dataset[:,0:3])
dataset[:,4:5] = SC.fit_transform(dataset[:,4:5])
dataset[:,17:19] = SC.fit_transform(dataset[:,17:19])
dataset[:,37:38] = SC.fit_transform(dataset[:,37:38])
dataset[:,39:53] = SC.fit_transform(dataset[:,39:53])
dataset'''
'SC = StandardScaler()\ndataset[:,0:3] = SC.fit_transform(dataset[:,0:3])\ndataset[:,4:5] = SC.fit_transform(dataset[:,4:5])\ndataset[:,17:19] = SC.fit_transform(dataset[:,17:19])\ndataset[:,37:38] = SC.fit_transform(dataset[:,37:38])\ndataset[:,39:53] = SC.fit_transform(dataset[:,39:53])\ndataset'
'''dataset = pd.DataFrame(dataset,columns = ['Age' , 'Weight' ,'Length', 'Sex' , 'BMI' ,'DM', 'HTN' , 'Current Smoker', 'EX-Smoker' , 'FH' , 'Obesity' ,
'CRF','CVA','Airway disease','Thyroid Disease','CHF' , 'DLP' ,'BP', 'PR','Edema' , 'Weak Peripheral Pulse' ,
'Lung rales' , 'Systolic Murmur' , 'Diastolic Murmur','Typical Chest Pain' , 'Dyspnea' ,
'Function Class' ,'Atypical', 'Nonanginal' , 'LowTH Ang' ,'Q Wave' , 'St Elevation' , 'St Depression' ,
'Tinversion' , 'LVH' , 'Poor R Progression' , 'BBB' ,'FBS' , 'CR' , 'TG' , 'LDL' , 'HDL' , 'BUN' , 'ESR' ,
'HB' , 'K' , 'Na' , 'WBC' , 'Lymph' , 'Neut' ,'PLT' , 'EF-TTE' , 'Region RWMA' , 'VHD','Cath'])
dataset'''
"dataset = pd.DataFrame(dataset,columns = ['Age' , 'Weight' ,'Length', 'Sex' , 'BMI' ,'DM', 'HTN' , 'Current Smoker', 'EX-Smoker' , 'FH' , 'Obesity' , \n 'CRF','CVA','Airway disease','Thyroid Disease','CHF' , 'DLP' ,'BP', 'PR','Edema' , 'Weak Peripheral Pulse' , \n 'Lung rales' , 'Systolic Murmur' , 'Diastolic Murmur','Typical Chest Pain' , 'Dyspnea' , \n 'Function Class' ,'Atypical', 'Nonanginal' , 'LowTH Ang' ,'Q Wave' , 'St Elevation' , 'St Depression' , \n 'Tinversion' , 'LVH' , 'Poor R Progression' , 'BBB' ,'FBS' , 'CR' , 'TG' , 'LDL' , 'HDL' , 'BUN' , 'ESR' , \n 'HB' , 'K' , 'Na' , 'WBC' , 'Lymph' , 'Neut' ,'PLT' , 'EF-TTE' , 'Region RWMA' , 'VHD','Cath'])\ndataset"
dataset.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 303 entries, 0 to 302 Data columns (total 55 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Age 303 non-null int64 1 Weight 303 non-null int64 2 Length 303 non-null int64 3 Sex 303 non-null int64 4 BMI 303 non-null float64 5 DM 303 non-null int64 6 HTN 303 non-null int64 7 Current Smoker 303 non-null int64 8 EX-Smoker 303 non-null int64 9 FH 303 non-null int64 10 Obesity 303 non-null int64 11 CRF 303 non-null int64 12 CVA 303 non-null int64 13 Airway disease 303 non-null int64 14 Thyroid Disease 303 non-null int64 15 CHF 303 non-null int64 16 DLP 303 non-null int64 17 BP 303 non-null int64 18 PR 303 non-null int64 19 Edema 303 non-null int64 20 Weak Peripheral Pulse 303 non-null int64 21 Lung rales 303 non-null int64 22 Systolic Murmur 303 non-null int64 23 Diastolic Murmur 303 non-null int64 24 Typical Chest Pain 303 non-null int64 25 Dyspnea 303 non-null int64 26 Function Class 303 non-null int64 27 Atypical 303 non-null int64 28 Nonanginal 303 non-null int64 29 LowTH Ang 303 non-null int64 30 Q Wave 303 non-null int64 31 St Elevation 303 non-null int64 32 St Depression 303 non-null int64 33 Tinversion 303 non-null int64 34 LVH 303 non-null int64 35 Poor R Progression 303 non-null int64 36 BBB 303 non-null int64 37 FBS 303 non-null int64 38 CR 303 non-null float64 39 TG 303 non-null int64 40 LDL 303 non-null int64 41 HDL 303 non-null float64 42 BUN 303 non-null int64 43 ESR 303 non-null int64 44 HB 303 non-null float64 45 K 303 non-null float64 46 Na 303 non-null int64 47 WBC 303 non-null int64 48 Lymph 303 non-null int64 49 Neut 303 non-null int64 50 PLT 303 non-null int64 51 EF-TTE 303 non-null int64 52 Region RWMA 303 non-null int64 53 VHD 303 non-null int64 54 Cath 303 non-null int64 dtypes: float64(5), int64(50) memory usage: 130.3 KB
dataset.describe()
| Age | Weight | Length | Sex | BMI | DM | HTN | Current Smoker | EX-Smoker | FH | ... | K | Na | WBC | Lymph | Neut | PLT | EF-TTE | Region RWMA | VHD | Cath | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | ... | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 |
| mean | 58.897690 | 73.831683 | 164.716172 | 0.580858 | 27.248339 | 0.297030 | 0.590759 | 0.207921 | 0.033003 | 0.158416 | ... | 4.230693 | 140.996700 | 7562.046205 | 32.399340 | 60.148515 | 221.488449 | 47.231023 | 0.620462 | 1.930693 | 0.287129 |
| std | 10.392278 | 11.987358 | 9.327661 | 0.494235 | 4.098865 | 0.457706 | 0.492507 | 0.406491 | 0.178941 | 0.365734 | ... | 0.458202 | 3.807885 | 2413.739323 | 9.972592 | 10.182493 | 60.796199 | 8.927194 | 1.132531 | 1.109180 | 0.453171 |
| min | 30.000000 | 48.000000 | 140.000000 | 0.000000 | 18.115413 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 3.000000 | 128.000000 | 3700.000000 | 7.000000 | 32.000000 | 25.000000 | 15.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 51.000000 | 65.000000 | 158.000000 | 0.000000 | 24.514380 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 3.900000 | 139.000000 | 5800.000000 | 26.000000 | 52.500000 | 183.500000 | 45.000000 | 0.000000 | 1.000000 | 0.000000 |
| 50% | 58.000000 | 74.000000 | 165.000000 | 1.000000 | 26.775510 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 4.200000 | 141.000000 | 7100.000000 | 32.000000 | 60.000000 | 210.000000 | 50.000000 | 0.000000 | 2.000000 | 0.000000 |
| 75% | 66.000000 | 81.000000 | 171.000000 | 1.000000 | 29.411765 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 4.500000 | 143.000000 | 8800.000000 | 39.000000 | 67.000000 | 250.000000 | 55.000000 | 1.000000 | 3.000000 | 1.000000 |
| max | 86.000000 | 120.000000 | 188.000000 | 1.000000 | 40.900658 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 6.600000 | 156.000000 | 18000.000000 | 60.000000 | 89.000000 | 742.000000 | 60.000000 | 4.000000 | 3.000000 | 1.000000 |
8 rows × 55 columns
dataset.corr()
| Age | Weight | Length | Sex | BMI | DM | HTN | Current Smoker | EX-Smoker | FH | ... | K | Na | WBC | Lymph | Neut | PLT | EF-TTE | Region RWMA | VHD | Cath | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Age | 1.000000 | -0.264585 | -0.163753 | -0.045769 | -0.161414 | 0.072543 | 0.246690 | -0.143879 | 0.076608 | -0.183900 | ... | 0.154203 | -0.071886 | 0.020398 | -0.171529 | 0.173030 | -0.049500 | -0.140512 | 0.108663 | 0.117735 | -0.357247 |
| Weight | -0.264585 | 1.000000 | 0.460631 | 0.234529 | 0.725005 | -0.003531 | -0.028532 | 0.157385 | 0.068977 | 0.021963 | ... | -0.018287 | 0.013916 | -0.020214 | 0.020120 | -0.051093 | -0.094192 | 0.026789 | -0.007648 | -0.092527 | 0.066833 |
| Length | -0.163753 | 0.460631 | 1.000000 | 0.700279 | -0.269356 | -0.052318 | -0.153668 | 0.335248 | 0.079034 | 0.004488 | ... | -0.086277 | -0.066590 | 0.066658 | -0.156436 | 0.115075 | -0.133446 | -0.093295 | 0.095715 | -0.085441 | -0.001024 |
| Sex | -0.045769 | 0.234529 | 0.700279 | 1.000000 | -0.284088 | -0.194348 | -0.149278 | 0.336330 | 0.156932 | -0.071098 | ... | -0.038046 | -0.079913 | 0.086823 | -0.132539 | 0.112422 | -0.143698 | -0.230896 | 0.128955 | -0.059207 | -0.067041 |
| BMI | -0.161414 | 0.725005 | -0.269356 | -0.284088 | 1.000000 | 0.045360 | 0.091652 | -0.089398 | 0.005016 | 0.014045 | ... | 0.044587 | 0.067402 | -0.074928 | 0.139583 | -0.140037 | -0.003964 | 0.093903 | -0.079739 | -0.037365 | 0.078189 |
| DM | 0.072543 | -0.003531 | -0.052318 | -0.194348 | 0.045360 | 1.000000 | 0.217864 | -0.208458 | -0.120087 | -0.064434 | ... | 0.100064 | -0.083030 | 0.110345 | 0.033413 | -0.024417 | 0.051054 | -0.052507 | 0.064891 | 0.001550 | -0.252897 |
| HTN | 0.246690 | -0.028532 | -0.153668 | -0.149278 | 0.091652 | 0.217864 | 1.000000 | -0.169000 | 0.041045 | -0.098467 | ... | 0.011826 | 0.036355 | -0.069374 | 0.017204 | -0.025476 | -0.043840 | 0.031365 | -0.000372 | 0.117629 | -0.287761 |
| Current Smoker | -0.143879 | 0.157385 | 0.335248 | 0.336330 | -0.089398 | -0.208458 | -0.169000 | 1.000000 | -0.094652 | 0.089532 | ... | -0.016599 | 0.036812 | 0.046205 | -0.053224 | 0.042115 | -0.048473 | -0.068943 | 0.078479 | -0.092783 | -0.073504 |
| EX-Smoker | 0.076608 | 0.068977 | 0.079034 | 0.156932 | 0.005016 | -0.120087 | 0.041045 | -0.094652 | 1.000000 | -0.080152 | ... | -0.004318 | -0.038716 | -0.057655 | 0.070524 | -0.068122 | -0.079102 | -0.015153 | 0.012996 | -0.005121 | -0.035578 |
| FH | -0.183900 | 0.021963 | 0.004488 | -0.071098 | 0.014045 | -0.064434 | -0.098467 | 0.089532 | -0.080152 | 1.000000 | ... | -0.017255 | -0.113749 | 0.067973 | -0.014679 | 0.040786 | -0.023000 | 0.089157 | -0.038230 | -0.078958 | -0.035605 |
| Obesity | -0.126190 | 0.547267 | -0.171962 | -0.211808 | 0.712501 | 0.020839 | 0.136482 | -0.050782 | 0.041635 | 0.011288 | ... | 0.001942 | 0.042850 | -0.053290 | 0.128132 | -0.126619 | -0.008876 | 0.104896 | -0.056608 | 0.004043 | 0.022461 |
| CRF | 0.126980 | -0.025713 | -0.033826 | 0.024718 | 0.009386 | 0.114975 | 0.118299 | 0.043924 | 0.106345 | 0.068090 | ... | -0.035429 | -0.031033 | -0.064608 | -0.019977 | -0.002076 | 0.072621 | -0.048870 | -0.057046 | 0.008896 | -0.090205 |
| CVA | 0.026247 | 0.051610 | -0.007180 | 0.005025 | 0.066967 | 0.029189 | 0.055122 | -0.002528 | -0.023930 | 0.014752 | ... | 0.036615 | 0.013742 | 0.036442 | -0.036420 | 0.023592 | -0.049700 | -0.134162 | -0.002344 | 0.008107 | -0.024946 |
| Airway disease | 0.069941 | -0.057719 | 0.004021 | 0.021834 | -0.062781 | 0.028291 | 0.053887 | 0.074474 | -0.035857 | -0.084208 | ... | -0.059309 | -0.018397 | 0.003789 | -0.002468 | -0.013250 | -0.045168 | 0.010807 | 0.018335 | 0.044016 | -0.084179 |
| Thyroid Disease | -0.095886 | 0.033370 | -0.042495 | -0.091986 | 0.068834 | -0.051885 | 0.038634 | -0.078789 | -0.028410 | 0.053614 | ... | -0.029528 | 0.023249 | 0.044358 | -0.021614 | 0.025847 | 0.036405 | 0.060102 | -0.084389 | -0.010214 | 0.048077 |
| CHF | -0.021618 | 0.029659 | 0.014113 | 0.048881 | 0.020205 | -0.037405 | -0.069137 | -0.029482 | -0.010631 | -0.024966 | ... | 0.147091 | -0.166453 | 0.058217 | -0.013867 | 0.055765 | -0.010892 | -0.175818 | 0.171997 | -0.100329 | -0.036520 |
| DLP | 0.128147 | -0.080068 | -0.173428 | -0.277911 | 0.046570 | 0.250364 | 0.108948 | -0.190162 | -0.103196 | 0.060996 | ... | 0.030825 | 0.002463 | 0.103705 | 0.042078 | -0.013205 | 0.029321 | 0.145854 | -0.057397 | -0.007642 | 0.012718 |
| BP | 0.215527 | -0.025930 | -0.072511 | -0.071315 | 0.031916 | 0.128010 | 0.570418 | -0.079115 | 0.028781 | -0.082999 | ... | 0.033902 | 0.067064 | -0.071686 | -0.005244 | -0.007812 | -0.092516 | -0.047472 | 0.024047 | 0.112653 | -0.237762 |
| PR | 0.023576 | -0.075468 | -0.077549 | -0.095459 | -0.015680 | 0.025350 | 0.124176 | 0.002796 | -0.065240 | -0.057717 | ... | 0.147650 | 0.010357 | 0.080313 | -0.141028 | 0.144888 | -0.066714 | -0.210017 | 0.152990 | -0.045900 | -0.168366 |
| Edema | 0.132487 | -0.035323 | -0.039241 | 0.035315 | -0.009812 | 0.016133 | 0.134600 | -0.062343 | 0.057211 | 0.004589 | ... | 0.015969 | -0.004275 | 0.101513 | -0.025142 | 0.040315 | -0.054050 | -0.079315 | 0.083133 | -0.002572 | -0.054069 |
| Weak Peripheral Pulse | 0.153593 | -0.021990 | -0.037781 | -0.047479 | -0.001103 | 0.029189 | 0.107811 | -0.066365 | -0.023930 | -0.056199 | ... | 0.070595 | -0.013517 | 0.033217 | -0.044226 | 0.013398 | -0.018115 | -0.017891 | -0.048169 | 0.031502 | -0.082207 |
| Lung rales | 0.105655 | -0.097527 | -0.031980 | 0.129113 | -0.079006 | -0.048936 | -0.053769 | -0.055963 | -0.035857 | 0.060764 | ... | 0.044835 | -0.101942 | 0.167806 | -0.075130 | 0.087421 | -0.014644 | -0.315855 | 0.158786 | -0.003786 | -0.045179 |
| Systolic Murmur | 0.044819 | -0.092795 | -0.057362 | 0.042723 | -0.059971 | -0.109338 | -0.063208 | -0.012476 | 0.088946 | -0.039506 | ... | 0.032515 | -0.004733 | 0.049072 | -0.001331 | -0.027609 | 0.020184 | -0.228935 | 0.073055 | -0.132077 | 0.004856 |
| Diastolic Murmur | 0.029832 | -0.052770 | 0.007420 | -0.008972 | -0.062039 | -0.028643 | -0.052064 | -0.041738 | -0.032323 | 0.030575 | ... | -0.049987 | -0.061213 | 0.012437 | -0.120269 | 0.094974 | 0.183080 | -0.043798 | -0.078819 | -0.059273 | 0.146777 |
| Typical Chest Pain | 0.138387 | -0.002986 | 0.023149 | 0.036770 | -0.012911 | 0.105623 | 0.122788 | 0.079987 | 0.058855 | -0.035920 | ... | 0.126926 | -0.026933 | -0.022196 | -0.076830 | 0.065572 | -0.082399 | -0.103957 | 0.177166 | 0.115833 | -0.542967 |
| Dyspnea | 0.059379 | -0.067434 | -0.092743 | -0.038180 | 0.015020 | 0.090134 | 0.038359 | -0.095978 | 0.021484 | -0.058742 | ... | -0.035051 | -0.063902 | 0.040498 | 0.011673 | -0.038503 | 0.042101 | -0.132685 | -0.100747 | -0.022281 | 0.125211 |
| Function Class | 0.051424 | 0.040371 | -0.012710 | -0.043835 | 0.064736 | 0.086200 | 0.092880 | -0.037824 | 0.024499 | 0.010162 | ... | 0.007217 | -0.055051 | 0.075008 | -0.004590 | -0.012557 | 0.050548 | -0.119119 | 0.131131 | 0.034511 | -0.097087 |
| Atypical | -0.141722 | -0.012166 | -0.061937 | -0.043794 | 0.028345 | -0.088066 | -0.144666 | -0.058833 | -0.082886 | 0.083629 | ... | -0.113479 | 0.079633 | -0.053956 | 0.125676 | -0.094894 | -0.042610 | 0.162596 | -0.187982 | -0.126360 | 0.415922 |
| Nonanginal | -0.088702 | -0.009010 | 0.005612 | -0.068600 | -0.025488 | -0.088890 | 0.016443 | -0.084608 | 0.038985 | -0.062024 | ... | -0.061006 | 0.023496 | -0.029963 | 0.015727 | -0.032483 | 0.164402 | 0.098194 | -0.090414 | -0.065181 | 0.274184 |
| LowTH Ang | 0.087227 | -0.056749 | -0.071919 | -0.095959 | -0.018212 | -0.052986 | 0.067845 | 0.058668 | -0.015059 | -0.035366 | ... | -0.005469 | 0.160886 | 0.067246 | 0.140009 | -0.121469 | 0.007402 | -0.043270 | 0.171551 | 0.005102 | -0.051733 |
| Q Wave | -0.061677 | 0.020584 | 0.045229 | 0.051031 | -0.015837 | 0.072583 | -0.043583 | -0.084608 | 0.121591 | -0.062024 | ... | 0.048677 | -0.155067 | 0.013517 | -0.055419 | 0.029939 | 0.003692 | -0.266077 | 0.222826 | 0.028104 | -0.149848 |
| St Elevation | -0.056926 | 0.071406 | 0.050602 | 0.091379 | 0.029896 | 0.028955 | -0.040627 | 0.042192 | 0.047341 | -0.009379 | ... | 0.112392 | -0.103195 | 0.128728 | -0.060937 | 0.035447 | 0.070754 | -0.231493 | 0.268545 | 0.127353 | -0.139684 |
| St Depression | 0.177432 | -0.114619 | -0.150480 | -0.114342 | -0.009146 | 0.015532 | 0.016736 | 0.023762 | -0.014970 | 0.080075 | ... | -0.033712 | -0.005668 | 0.107977 | -0.019841 | 0.066265 | 0.119812 | -0.015214 | 0.165025 | -0.042775 | -0.144426 |
| Tinversion | 0.041913 | -0.021033 | 0.023690 | 0.054493 | -0.043311 | -0.058998 | 0.115040 | 0.200881 | 0.082060 | 0.054250 | ... | 0.032172 | -0.024134 | 0.018630 | -0.015916 | -0.003813 | 0.011547 | -0.121389 | 0.173485 | 0.001550 | -0.236933 |
| LVH | 0.125612 | 0.100367 | -0.007599 | -0.016626 | 0.110330 | -0.056449 | 0.221262 | -0.103449 | 0.025293 | -0.115338 | ... | 0.011220 | 0.084145 | -0.005190 | -0.028019 | -0.027420 | 0.014723 | 0.030394 | -0.051835 | 0.052649 | -0.051196 |
| Poor R Progression | 0.003599 | -0.106376 | -0.053120 | -0.048372 | -0.081253 | 0.269163 | 0.027011 | -0.041738 | -0.032323 | 0.083817 | ... | -0.007490 | -0.050986 | 0.010823 | -0.092933 | 0.052902 | -0.078598 | -0.087423 | -0.044432 | 0.010951 | -0.111040 |
| BBB | 0.005434 | -0.033420 | 0.058785 | -0.002436 | -0.072387 | -0.014153 | -0.052273 | -0.029726 | 0.011603 | -0.007153 | ... | -0.036975 | 0.075941 | -0.042690 | 0.002519 | -0.016381 | -0.090139 | 0.128472 | -0.076629 | -0.015274 | -0.043433 |
| FBS | 0.015385 | 0.012737 | -0.094789 | -0.217349 | 0.089380 | 0.677940 | 0.109592 | -0.101457 | -0.079537 | -0.080815 | ... | 0.102861 | -0.059455 | 0.159957 | -0.003719 | 0.031787 | 0.019886 | -0.056692 | 0.037291 | 0.046482 | -0.205553 |
| CR | 0.227097 | 0.150226 | 0.162634 | 0.260150 | 0.034338 | 0.028606 | 0.158881 | -0.046339 | 0.178112 | 0.031882 | ... | -0.010450 | -0.074997 | 0.145125 | -0.066620 | 0.096707 | -0.091782 | -0.115351 | 0.031475 | 0.032393 | -0.086758 |
| TG | -0.110793 | 0.078469 | -0.034389 | -0.016784 | 0.109422 | 0.108792 | 0.045954 | 0.062399 | 0.015597 | -0.019083 | ... | 0.023490 | 0.060313 | 0.012340 | 0.090065 | -0.081575 | -0.049424 | -0.027902 | 0.035353 | -0.010386 | -0.140593 |
| LDL | -0.033576 | -0.023233 | -0.090970 | -0.104153 | 0.040001 | -0.027167 | 0.022755 | -0.025440 | -0.025844 | 0.111292 | ... | 0.037732 | 0.168126 | 0.019056 | 0.118307 | -0.085044 | 0.013452 | 0.159394 | -0.026927 | -0.031837 | 0.023535 |
| HDL | -0.035793 | -0.059713 | -0.050594 | -0.116294 | -0.024338 | -0.043890 | -0.094226 | 0.010228 | -0.056676 | 0.078685 | ... | -0.074145 | 0.088912 | -0.063782 | 0.028257 | -0.024528 | 0.000638 | 0.104394 | -0.062022 | -0.130615 | 0.042587 |
| BUN | 0.300663 | -0.057670 | -0.071229 | 0.009351 | -0.011139 | 0.144394 | 0.152895 | -0.061596 | 0.066455 | -0.014419 | ... | 0.098618 | -0.136310 | 0.088416 | -0.044946 | 0.024421 | 0.041289 | -0.116665 | 0.018362 | 0.066315 | -0.088903 |
| ESR | 0.183127 | -0.139314 | -0.222182 | -0.306189 | 0.023259 | 0.190397 | 0.161704 | -0.121199 | 0.017858 | -0.061457 | ... | 0.006577 | -0.069327 | 0.160759 | -0.158031 | 0.138741 | 0.246826 | -0.057497 | 0.054697 | 0.038346 | -0.178447 |
| HB | -0.161018 | 0.274218 | 0.341028 | 0.403496 | 0.031107 | -0.156382 | -0.115935 | 0.216144 | -0.024528 | -0.045348 | ... | 0.033308 | 0.138745 | -0.000567 | 0.083837 | -0.075441 | -0.106252 | 0.006186 | -0.045481 | -0.017939 | 0.042416 |
| K | 0.154203 | -0.018287 | -0.086277 | -0.038046 | 0.044587 | 0.100064 | 0.011826 | -0.016599 | -0.004318 | -0.017255 | ... | 1.000000 | 0.010686 | 0.118689 | -0.008561 | -0.002896 | 0.022865 | -0.159512 | 0.229266 | -0.040756 | -0.181320 |
| Na | -0.071886 | 0.013916 | -0.066590 | -0.079913 | 0.067402 | -0.083030 | 0.036355 | 0.036812 | -0.038716 | -0.113749 | ... | 0.010686 | 1.000000 | -0.093826 | 0.141032 | -0.134406 | -0.022049 | 0.136491 | -0.022558 | -0.031414 | 0.084982 |
| WBC | 0.020398 | -0.020214 | 0.066658 | 0.086823 | -0.074928 | 0.110345 | -0.069374 | 0.046205 | -0.057655 | 0.067973 | ... | 0.118689 | -0.093826 | 1.000000 | -0.322100 | 0.377770 | 0.290805 | -0.137910 | 0.175318 | -0.011746 | -0.070830 |
| Lymph | -0.171529 | 0.020120 | -0.156436 | -0.132539 | 0.139583 | 0.033413 | 0.017204 | -0.053224 | 0.070524 | -0.014679 | ... | -0.008561 | 0.141032 | -0.322100 | 1.000000 | -0.923081 | -0.011639 | 0.239827 | -0.079181 | -0.037004 | 0.126945 |
| Neut | 0.173030 | -0.051093 | 0.115075 | 0.112422 | -0.140037 | -0.024417 | -0.025476 | 0.042115 | -0.068122 | 0.040786 | ... | -0.002896 | -0.134406 | 0.377770 | -0.923081 | 1.000000 | 0.003637 | -0.228776 | 0.112580 | 0.014987 | -0.124086 |
| PLT | -0.049500 | -0.094192 | -0.133446 | -0.143698 | -0.003964 | 0.051054 | -0.043840 | -0.048473 | -0.079102 | -0.023000 | ... | 0.022865 | -0.022049 | 0.290805 | -0.011639 | 0.003637 | 1.000000 | 0.068409 | -0.010812 | 0.021471 | 0.094888 |
| EF-TTE | -0.140512 | 0.026789 | -0.093295 | -0.230896 | 0.093903 | -0.052507 | 0.031365 | -0.068943 | -0.015153 | 0.089157 | ... | -0.159512 | 0.136491 | -0.137910 | 0.239827 | -0.228776 | 0.068409 | 1.000000 | -0.450799 | 0.083887 | 0.234009 |
| Region RWMA | 0.108663 | -0.007648 | 0.095715 | 0.128955 | -0.079739 | 0.064891 | -0.000372 | 0.078479 | 0.012996 | -0.038230 | ... | 0.229266 | -0.022558 | 0.175318 | -0.079181 | 0.112580 | -0.010812 | -0.450799 | 1.000000 | 0.005350 | -0.316011 |
| VHD | 0.117735 | -0.092527 | -0.085441 | -0.059207 | -0.037365 | 0.001550 | 0.117629 | -0.092783 | -0.005121 | -0.078958 | ... | -0.040756 | -0.031414 | -0.011746 | -0.037004 | 0.014987 | 0.021471 | 0.083887 | 0.005350 | 1.000000 | -0.065681 |
| Cath | -0.357247 | 0.066833 | -0.001024 | -0.067041 | 0.078189 | -0.252897 | -0.287761 | -0.073504 | -0.035578 | -0.035605 | ... | -0.181320 | 0.084982 | -0.070830 | 0.126945 | -0.124086 | 0.094888 | 0.234009 | -0.316011 | -0.065681 | 1.000000 |
55 rows × 55 columns
plt.figure(figsize=(40, 20))
matrix = np.triu(dataset.corr())
sns.heatmap(dataset.corr(), annot=True, linewidth=.10, mask=matrix, cmap="Paired");
x = dataset.iloc[:,:-1].values
x
array([[ 53., 90., 175., ..., 50., 0., 1.],
[ 67., 70., 157., ..., 40., 4., 1.],
[ 54., 54., 164., ..., 40., 2., 3.],
...,
[ 48., 77., 160., ..., 55., 0., 1.],
[ 57., 90., 159., ..., 55., 0., 1.],
[ 56., 85., 170., ..., 55., 0., 1.]])
y = dataset.iloc[:,-1].values
y
array([0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,
0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1,
0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0,
0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1,
1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0])
"""from sklearn.decomposition import PCA
pca = PCA()
x = pca.fit_transform(x)
x"""
'from sklearn.decomposition import PCA\npca = PCA()\nx = pca.fit_transform(x)\nx'
x_train , x_test , y_train , y_test = train_test_split(x,y,test_size=0.1,random_state = 0)
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
import xgboost as xgb
from catboost import CatBoostClassifier
#!pip install catboost
svc = SVC(kernel='linear', max_iter=500)
svc.fit(x_train,y_train)
/home/mahmoudragab/anaconda3/lib/python3.10/site-packages/sklearn/svm/_base.py:299: ConvergenceWarning: Solver terminated early (max_iter=500). Consider pre-processing your data with StandardScaler or MinMaxScaler. warnings.warn(
SVC(kernel='linear', max_iter=500)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SVC(kernel='linear', max_iter=500)
y_pred = svc.predict(x_test)
y_pred
array([1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1,
1, 1, 0, 0, 1, 0, 1, 1, 1])
y_test
array([0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 1, 1, 0, 1, 0, 0, 1])
print(classification_report(y_test,y_pred))
precision recall f1-score support
0 0.57 0.40 0.47 20
1 0.29 0.45 0.36 11
accuracy 0.42 31
macro avg 0.43 0.43 0.41 31
weighted avg 0.47 0.42 0.43 31
CM = confusion_matrix (y_test,y_pred)
sns.heatmap(CM,annot=True,
fmt='g',
xticklabels=['CAD','Normal'],
yticklabels=['CAD','Normal'])
print (CM)
[[ 8 12] [ 6 5]]
df_comp = pd.DataFrame({'Actual':y_test , 'Predict':y_pred})
df_comp
| Actual | Predict | |
|---|---|---|
| 0 | 0 | 1 |
| 1 | 0 | 1 |
| 2 | 0 | 1 |
| 3 | 1 | 1 |
| 4 | 0 | 1 |
| 5 | 0 | 0 |
| 6 | 1 | 1 |
| 7 | 1 | 0 |
| 8 | 0 | 1 |
| 9 | 1 | 0 |
| 10 | 0 | 1 |
| 11 | 0 | 0 |
| 12 | 1 | 0 |
| 13 | 0 | 0 |
| 14 | 0 | 0 |
| 15 | 0 | 1 |
| 16 | 0 | 0 |
| 17 | 0 | 0 |
| 18 | 0 | 1 |
| 19 | 0 | 0 |
| 20 | 0 | 0 |
| 21 | 1 | 1 |
| 22 | 0 | 1 |
| 23 | 1 | 1 |
| 24 | 1 | 0 |
| 25 | 1 | 0 |
| 26 | 0 | 1 |
| 27 | 1 | 0 |
| 28 | 0 | 1 |
| 29 | 0 | 1 |
| 30 | 1 | 1 |
print ('y_test = ',y_test.sum())
print ('y_pred = ',y_pred.sum())
y_test = 11 y_pred = 17
plt.title (' Actual & Predict ',color = 'r')
sns.heatmap(df_comp)
<Axes: title={'center': ' Actual & Predict '}>
plt.style.use('default')
plt.figure(figsize=(12,7))
data = [87,13]
names = ['Correct Result','Wrong Result']
plt.title (' Accuracy ',color = 'black')
plt.pie (data,labels = names,labeldistance = 1.1,startangle = 90,colors = ['g','y'],autopct = '%1.0f%%',)
([<matplotlib.patches.Wedge at 0x7faec567e560>, <matplotlib.patches.Wedge at 0x7faec567e4a0>], [Text(-0.4368626645752139, -1.0095300947967052, 'Correct Result'), Text(0.436862664575214, 1.009530094796705, 'Wrong Result')], [Text(-0.23828872613193483, -0.550652778980021, '87%'), Text(0.23828872613193489, 0.5506527789800209, '13%')])
RFC = RandomForestClassifier(n_estimators = 250,max_features = 30)
RFC.fit(x_train,y_train)
RandomForestClassifier(max_features=30, n_estimators=250)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestClassifier(max_features=30, n_estimators=250)
y_pred = RFC.predict(x_test)
y_pred
array([0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 0, 1, 0, 0, 1])
y_test
array([0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 1, 1, 0, 1, 0, 0, 1])
print(classification_report(y_test,y_pred))
precision recall f1-score support
0 0.87 1.00 0.93 20
1 1.00 0.73 0.84 11
accuracy 0.90 31
macro avg 0.93 0.86 0.89 31
weighted avg 0.92 0.90 0.90 31
plt.style.use ("dark_background")
CM = confusion_matrix (y_test,y_pred)
sns.heatmap(CM,annot=True,
fmt='g',
xticklabels=['CAD','Normal'],
yticklabels=['CAD','Normal'])
print (CM)
[[20 0] [ 3 8]]
df_comp = pd.DataFrame({'Actual':y_test , 'Predict':y_pred})
df_comp
| Actual | Predict | |
|---|---|---|
| 0 | 0 | 0 |
| 1 | 0 | 0 |
| 2 | 0 | 0 |
| 3 | 1 | 1 |
| 4 | 0 | 0 |
| 5 | 0 | 0 |
| 6 | 1 | 0 |
| 7 | 1 | 1 |
| 8 | 0 | 0 |
| 9 | 1 | 1 |
| 10 | 0 | 0 |
| 11 | 0 | 0 |
| 12 | 1 | 1 |
| 13 | 0 | 0 |
| 14 | 0 | 0 |
| 15 | 0 | 0 |
| 16 | 0 | 0 |
| 17 | 0 | 0 |
| 18 | 0 | 0 |
| 19 | 0 | 0 |
| 20 | 0 | 0 |
| 21 | 1 | 1 |
| 22 | 0 | 0 |
| 23 | 1 | 1 |
| 24 | 1 | 0 |
| 25 | 1 | 0 |
| 26 | 0 | 0 |
| 27 | 1 | 1 |
| 28 | 0 | 0 |
| 29 | 0 | 0 |
| 30 | 1 | 1 |
plt.title (' Actual & Predict ',color = 'r')
sns.heatmap(df_comp)
<Axes: title={'center': ' Actual & Predict '}>
plt.style.use('default')
plt.figure(figsize=(12,7))
data = [90,10]
names = ['Correct Result','Wrong Result']
plt.title (' Accuracy ',color = 'black')
plt.pie (data,labels = names,labeldistance = 1.1,startangle = 90,colors = ['g','y'],autopct = '%1.0f%%',)
([<matplotlib.patches.Wedge at 0x7faec5471db0>, <matplotlib.patches.Wedge at 0x7faec5471cf0>], [Text(-0.33991877217145866, -1.046162142464278, 'Correct Result'), Text(0.3399188456330338, 1.0461621185951564, 'Wrong Result')], [Text(-0.18541023936625015, -0.5706338958896061, '90%'), Text(0.18541027943620023, 0.5706338828700852, '10%')])
xgb_model = xgb.XGBClassifier(objective="binary:logistic",learning_rate=0.01,max_depth=3,n_estimators=100,subsample=0.9,colsample_bytree=0.9)
xgb_model.fit(x_train,y_train)
XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=0.9, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=0.01, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=3, max_leaves=None,
min_child_weight=None, missing=nan, monotone_constraints=None,
n_estimators=100, n_jobs=None, num_parallel_tree=None,
predictor=None, random_state=None, ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=0.9, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=0.01, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=3, max_leaves=None,
min_child_weight=None, missing=nan, monotone_constraints=None,
n_estimators=100, n_jobs=None, num_parallel_tree=None,
predictor=None, random_state=None, ...)y_pred = xgb_model.predict(x_test)
y_pred
array([0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 1, 0, 1, 0, 0, 1])
y_test
array([0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 1, 1, 0, 1, 0, 0, 1])
print(classification_report(y_test,y_pred))
precision recall f1-score support
0 0.91 1.00 0.95 20
1 1.00 0.82 0.90 11
accuracy 0.94 31
macro avg 0.95 0.91 0.93 31
weighted avg 0.94 0.94 0.93 31
CBC = CatBoostClassifier(iterations=100, learning_rate=0.01, depth=5, loss_function='Logloss',verbose=100)
CBC.fit(x_train,y_train)
0: learn: 0.6852223 total: 3.07ms remaining: 304ms 99: learn: 0.3468872 total: 154ms remaining: 0us
<catboost.core.CatBoostClassifier at 0x7faec5448a30>
y_pred = CBC.predict(x_test)
y_pred
array([0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 1, 0, 1, 0, 0, 1])
y_test
array([0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 1, 1, 0, 1, 0, 0, 1])
print(classification_report(y_test,y_pred))
precision recall f1-score support
0 0.87 1.00 0.93 20
1 1.00 0.73 0.84 11
accuracy 0.90 31
macro avg 0.93 0.86 0.89 31
weighted avg 0.92 0.90 0.90 31
from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state=42)
X_res, y_res = sm.fit_resample(x_train, y_train)
CBC = CatBoostClassifier(iterations=100, learning_rate=0.01, depth=5, loss_function='Logloss',verbose=100)
CBC.fit(X_res,y_res)
0: learn: 0.6865856 total: 1.7ms remaining: 168ms 99: learn: 0.3083666 total: 238ms remaining: 0us
<catboost.core.CatBoostClassifier at 0x7faec5421390>
y_pred = CBC.predict(x_test)
y_pred
array([0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 1, 0, 1, 0, 0, 1])
print(classification_report(y_test,y_pred))
precision recall f1-score support
0 0.90 0.95 0.93 20
1 0.90 0.82 0.86 11
accuracy 0.90 31
macro avg 0.90 0.88 0.89 31
weighted avg 0.90 0.90 0.90 31
import numpy as np
from sklearn.neighbors import NearestNeighbors
class SMOTE2:
def __init__(self, k_neighbors=5, random_state=None):
self.k = k_neighbors
self.rng = np.random.RandomState(random_state)
def _generate_samples(self, X, y, minority_class):
# find the k nearest neighbors of each minority class sample
nn = NearestNeighbors(n_neighbors=self.k).fit(X)
minority_indices = np.where(y == minority_class)[0]
nn_distances, nn_indices = nn.kneighbors(X[minority_indices])
# randomly choose neighbors and generate synthetic samples
n_minority = len(minority_indices)
n_synthetic = int(n_minority * self.ratio) - n_minority
synthetic_indices = np.zeros((n_synthetic, X.shape[1]))
for i in range(n_synthetic):
j = self.rng.randint(n_minority)
nn = nn_indices[j, self.rng.randint(1, self.k)]
gap = self.rng.rand(X.shape[1])
synthetic_indices[i] = X[minority_indices[j]] + gap * (X[nn] - X[minority_indices[j]])
return synthetic_indices
def fit_resample(self, X, y, ratio=1.0):
self.ratio = ratio
unique_classes = np.unique(y)
X_resampled = np.copy(X)
y_resampled = np.copy(y)
for minority_class in unique_classes:
if minority_class == np.argmax(np.bincount(y)):
continue
synthetic_indices = self._generate_samples(X, y, minority_class)
X_resampled = np.vstack([X_resampled, synthetic_indices])
y_resampled = np.hstack([y_resampled, np.full((len(synthetic_indices), ), minority_class)])
return X_resampled, y_resampled
sm = SMOTE2(random_state=42)
X_res, y_res = sm.fit_resample(x_train, y_train.astype(int))
CBC = CatBoostClassifier(iterations=100, learning_rate=0.01, depth=5, loss_function='Logloss',verbose=100)
CBC.fit(X_res,y_res)
y_pred = CBC.predict(x_test)
y_pred
0: learn: 0.6852223 total: 1.17ms remaining: 116ms 99: learn: 0.3468872 total: 150ms remaining: 0us
array([0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 1, 0, 1, 0, 0, 1])
print(classification_report(y_test,y_pred))
precision recall f1-score support
0 0.87 1.00 0.93 20
1 1.00 0.73 0.84 11
accuracy 0.90 31
macro avg 0.93 0.86 0.89 31
weighted avg 0.92 0.90 0.90 31
RFC = RandomForestClassifier(n_estimators=250,max_features = 55,max_depth=2, random_state=42,max_leaf_nodes=15)
RFC.fit(X_res,y_res)
y_pred = RFC.predict(x_test)
y_pred
array([0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 0, 1, 0, 0, 1])
print(classification_report(y_test,y_pred))
precision recall f1-score support
0 0.87 1.00 0.93 20
1 1.00 0.73 0.84 11
accuracy 0.90 31
macro avg 0.93 0.86 0.89 31
weighted avg 0.92 0.90 0.90 31
from imblearn.combine import SMOTETomek
smote_tomek = SMOTETomek(random_state=42)
X_train_resampled, y_train_resampled = smote_tomek.fit_resample(x_train, y_train)
RFC = RandomForestClassifier(n_estimators=250,max_features = 55,max_depth=5, random_state=42,max_leaf_nodes=8)
RFC.fit(X_train_resampled,y_train_resampled)
y_pred = RFC.predict(x_test)
y_pred
array([0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 1, 0, 1, 0, 0, 1])
print(classification_report(y_test, y_pred))
precision recall f1-score support
0 0.90 0.90 0.90 20
1 0.82 0.82 0.82 11
accuracy 0.87 31
macro avg 0.86 0.86 0.86 31
weighted avg 0.87 0.87 0.87 31
# train logistic regression model on resampled data
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(random_state=42)
lr.fit(X_train_resampled, y_train_resampled)
# evaluate model on testing data
y_pred = lr.predict(x_test)
print(classification_report(y_test, y_pred))
precision recall f1-score support
0 0.81 0.65 0.72 20
1 0.53 0.73 0.62 11
accuracy 0.68 31
macro avg 0.67 0.69 0.67 31
weighted avg 0.71 0.68 0.68 31
/home/mahmoudragab/anaconda3/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
import lightgbm as lgb
clf = lgb.LGBMClassifier()
clf.fit(X_train_resampled, y_train_resampled)
y_pred=clf.predict(x_test)
y_pred
array([0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 1, 0, 1, 0, 0, 1])
print(classification_report(y_test, y_pred))
precision recall f1-score support
0 0.90 0.95 0.93 20
1 0.90 0.82 0.86 11
accuracy 0.90 31
macro avg 0.90 0.88 0.89 31
weighted avg 0.90 0.90 0.90 31
#**************************************************************************************************************
from imblearn.over_sampling import SMOTE
from sklearn.datasets import make_classification
from collections import Counter
''# Generate a synthetic imbalanced dataset
x, y = make_classification(n_samples=1000, n_features=20, n_informative=2,
n_redundant=10, n_classes=2, weights=[0.99],
flip_y=0, random_state=1)
print("Before oversampling:", Counter(y))
''
Before oversampling: Counter({0: 990, 1: 10})
''
# Apply SMOTE
smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(x, y)
print("After oversampling:", Counter(y_resampled))
After oversampling: Counter({0: 990, 1: 990})
x_train , x_test , y_train , y_test = train_test_split(X_resampled,y_resampled,test_size=0.1,random_state = 0)
xgb_model_2 = xgb.XGBClassifier(objective="binary:logistic",learning_rate=0.01,max_depth=3,n_estimators=100,subsample=0.9,colsample_bytree=0.9)
xgb_model_2.fit(x_train,y_train)
y_pred = xgb_model_2.predict(x_test)
y_pred
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0,
0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1,
1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1,
1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0,
1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1,
1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1,
1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1,
1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1])
print(classification_report(y_test, y_pred))
precision recall f1-score support
0 1.00 0.93 0.96 100
1 0.93 1.00 0.97 98
accuracy 0.96 198
macro avg 0.97 0.97 0.96 198
weighted avg 0.97 0.96 0.96 198
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_test, y_pred)
print("Model Accuracy =",acc*100,"%")
Model Accuracy = 96.46464646464646 %
cm = confusion_matrix(y_test, y_pred)
%matplotlib inline
# Plot confusion matrix
class_names = ['Cad','Normal']
df_cm = pd.DataFrame(cm, index = [i for i in class_names], columns = [i for i in class_names])
sns.heatmap(df_cm, annot = True)
cmap = plt.cm.Blues
plt.imshow(cm, interpolation = 'nearest', cmap = cmap)
# Model Accuracy
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_test, y_pred)
print("Model Accuracy =",acc*100,"%")
Model Accuracy = 96.46464646464646 %
CBC = CatBoostClassifier(iterations=100, learning_rate=0.01, depth=5, loss_function='Logloss',verbose=100)
CBC.fit(x_train,y_train)
y_pred = CBC.predict(x_test)
y_pred
0: learn: 0.6750508 total: 19.3ms remaining: 1.91s 99: learn: 0.1308255 total: 325ms remaining: 0us
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0,
0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1,
1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0,
1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1,
1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1,
1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1,
1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1])
print(classification_report(y_test, y_pred))
precision recall f1-score support
0 1.00 0.95 0.97 100
1 0.95 1.00 0.98 98
accuracy 0.97 198
macro avg 0.98 0.97 0.97 198
weighted avg 0.98 0.97 0.97 198
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_test, y_pred)
print("Model Accuracy =",acc*100,"%")
Model Accuracy = 97.47474747474747 %
cm = confusion_matrix(y_test, y_pred)
%matplotlib inline
# Plot confusion matrix
class_names = ['Cad','Normal']
df_cm = pd.DataFrame(cm, index = [i for i in class_names], columns = [i for i in class_names])
sns.heatmap(df_cm, annot = True)
cmap = plt.cm.Blues
plt.imshow(cm, interpolation = 'nearest', cmap = cmap)
# Model Accuracy
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_test, y_pred)
print("Model Accuracy =",acc*100,"%")
Model Accuracy = 97.47474747474747 %
df_comp = pd.DataFrame({'Actual':y_test , 'Predict':y_pred})
df_comp
| Actual | Predict | |
|---|---|---|
| 0 | 0 | 0 |
| 1 | 0 | 0 |
| 2 | 0 | 0 |
| 3 | 0 | 0 |
| 4 | 0 | 0 |
| ... | ... | ... |
| 193 | 1 | 1 |
| 194 | 1 | 1 |
| 195 | 0 | 0 |
| 196 | 1 | 1 |
| 197 | 1 | 1 |
198 rows × 2 columns
plt.title (' Actual & Predict ',color = 'r')
sns.heatmap(df_comp)
<Axes: title={'center': ' Actual & Predict '}>
import lightgbm as lgb
clf = lgb.LGBMClassifier()
clf.fit(x_train, y_train)
y_pred=clf.predict(x_test)
print(classification_report(y_test, y_pred))
precision recall f1-score support
0 1.00 1.00 1.00 100
1 1.00 1.00 1.00 98
accuracy 1.00 198
macro avg 1.00 1.00 1.00 198
weighted avg 1.00 1.00 1.00 198